{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "34be2ef1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import warnings \n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "warnings.filterwarnings(\"ignore\")\n",
    "os.path.abspath(\" \")\n",
    "csv_path = \"/Users/collinsliu/learning/jobs/self advance/course/sales_order_detail.csv\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "43ec15e2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Index(['event_time', 'order_id', 'product_id', 'category_id', 'category_code',\n",
       "       'brand', 'price', 'user_id', 'age', 'sex', 'local'],\n",
       "      dtype='object')"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "large_df = pd.read_csv(csv_path,\n",
    "                       index_col=0, \n",
    "                       encoding='utf-8')\n",
    "large_df.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "71b1c838",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>event_time</th>\n",
       "      <th>category_code</th>\n",
       "      <th>user_id</th>\n",
       "      <th>age</th>\n",
       "      <th>sex</th>\n",
       "      <th>local</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>2020-04-24 11:50:39 UTC</td>\n",
       "      <td>electronics.tablet</td>\n",
       "      <td>1515915625441990000</td>\n",
       "      <td>24</td>\n",
       "      <td>女</td>\n",
       "      <td>海南</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2020-04-24 11:50:39 UTC</td>\n",
       "      <td>electronics.tablet</td>\n",
       "      <td>1515915625441990000</td>\n",
       "      <td>24</td>\n",
       "      <td>女</td>\n",
       "      <td>海南</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2020-04-24 14:37:43 UTC</td>\n",
       "      <td>electronics.audio.headphone</td>\n",
       "      <td>1515915625447870000</td>\n",
       "      <td>38</td>\n",
       "      <td>女</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2020-04-24 14:37:43 UTC</td>\n",
       "      <td>electronics.audio.headphone</td>\n",
       "      <td>1515915625447870000</td>\n",
       "      <td>38</td>\n",
       "      <td>女</td>\n",
       "      <td>北京</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2020-04-24 19:16:21 UTC</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1515915625443140000</td>\n",
       "      <td>32</td>\n",
       "      <td>女</td>\n",
       "      <td>广东</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                event_time                category_code              user_id  \\\n",
       "0  2020-04-24 11:50:39 UTC           electronics.tablet  1515915625441990000   \n",
       "1  2020-04-24 11:50:39 UTC           electronics.tablet  1515915625441990000   \n",
       "2  2020-04-24 14:37:43 UTC  electronics.audio.headphone  1515915625447870000   \n",
       "3  2020-04-24 14:37:43 UTC  electronics.audio.headphone  1515915625447870000   \n",
       "4  2020-04-24 19:16:21 UTC                          NaN  1515915625443140000   \n",
       "\n",
       "   age sex local  \n",
       "0   24   女    海南  \n",
       "1   24   女    海南  \n",
       "2   38   女    北京  \n",
       "3   38   女    北京  \n",
       "4   32   女    广东  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dau_df = large_df[[\"event_time\",\"category_code\",\"user_id\",\"age\",\"sex\",\"local\"]]\n",
    "dau_df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "51d0244c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>user_id</th>\n",
       "      <th>age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>5.641690e+05</td>\n",
       "      <td>564169.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>1.515916e+18</td>\n",
       "      <td>33.184388</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.379042e+07</td>\n",
       "      <td>10.122088</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>1.515916e+18</td>\n",
       "      <td>16.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.515916e+18</td>\n",
       "      <td>24.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>1.515916e+18</td>\n",
       "      <td>33.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>1.515916e+18</td>\n",
       "      <td>42.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>1.515916e+18</td>\n",
       "      <td>50.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            user_id            age\n",
       "count  5.641690e+05  564169.000000\n",
       "mean   1.515916e+18      33.184388\n",
       "std    2.379042e+07      10.122088\n",
       "min    1.515916e+18      16.000000\n",
       "25%    1.515916e+18      24.000000\n",
       "50%    1.515916e+18      33.000000\n",
       "75%    1.515916e+18      42.000000\n",
       "max    1.515916e+18      50.000000"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dau_df.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "e6214402",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "event_time       object\n",
       "category_code    object\n",
       "user_id           int64\n",
       "age               int64\n",
       "sex              object\n",
       "local            object\n",
       "dtype: object"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datetime import datetime\n",
    "\n",
    "dformat = format(\"%Y-%m-%d %H:%M:%S %Z\")\n",
    "dau_df[\"event_time\"].apply(lambda date: datetime.strptime(date,dformat))\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "491745f5",
   "metadata": {},
   "source": [
    "# DAU "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "807da691",
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "Invalid comparison between dtype=datetime64[ns, UTC] and datetime",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/datetimelike.py:536\u001b[0m, in \u001b[0;36mDatetimeLikeArrayMixin._validate_comparison_value\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    535\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 536\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_check_compatible_with\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    537\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, IncompatibleFrequency) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m    538\u001b[0m     \u001b[38;5;66;03m# e.g. tzawareness mismatch\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:508\u001b[0m, in \u001b[0;36mDatetimeArray._check_compatible_with\u001b[0;34m(self, other, setitem)\u001b[0m\n\u001b[1;32m    507\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m--> 508\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_assert_tzawareness_compat\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    509\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m setitem:\n\u001b[1;32m    510\u001b[0m     \u001b[38;5;66;03m# Stricter check for setitem vs comparison methods\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/datetimes.py:715\u001b[0m, in \u001b[0;36mDatetimeArray._assert_tzawareness_compat\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    714\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m other_tz \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 715\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\n\u001b[1;32m    716\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot compare tz-naive and tz-aware datetime-like objects\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    717\u001b[0m     )\n",
      "\u001b[0;31mTypeError\u001b[0m: Cannot compare tz-naive and tz-aware datetime-like objects",
      "\nThe above exception was the direct cause of the following exception:\n",
      "\u001b[0;31mInvalidComparison\u001b[0m                         Traceback (most recent call last)",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/datetimelike.py:1008\u001b[0m, in \u001b[0;36mDatetimeLikeArrayMixin._cmp_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m   1007\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1008\u001b[0m     other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_comparison_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1009\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidComparison:\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/datetimelike.py:539\u001b[0m, in \u001b[0;36mDatetimeLikeArrayMixin._validate_comparison_value\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m    537\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, IncompatibleFrequency) \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[1;32m    538\u001b[0m         \u001b[38;5;66;03m# e.g. tzawareness mismatch\u001b[39;00m\n\u001b[0;32m--> 539\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m InvalidComparison(other) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01merr\u001b[39;00m\n\u001b[1;32m    541\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_list_like(other):\n",
      "\u001b[0;31mInvalidComparison\u001b[0m: 2000-01-01 00:00:00",
      "\nDuring handling of the above exception, another exception occurred:\n",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[28], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m mask \u001b[38;5;241m=\u001b[39m \u001b[43mdau_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mevent_time\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgt\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdatetime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstrptime\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2000-01-01 00:00:00\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mY-\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mm-\u001b[39;49m\u001b[38;5;132;43;01m%d\u001b[39;49;00m\u001b[38;5;124;43m \u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mH:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mM:\u001b[39;49m\u001b[38;5;124;43m%\u001b[39;49m\u001b[38;5;124;43mS\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      2\u001b[0m dau_df \u001b[38;5;241m=\u001b[39m dau_df\u001b[38;5;241m.\u001b[39mloc[mask]\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/__init__.py:197\u001b[0m, in \u001b[0;36mflex_method_SERIES.<locals>.flex_wrapper\u001b[0;34m(self, other, level, fill_value, axis)\u001b[0m\n\u001b[1;32m    194\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fill_value \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    195\u001b[0m     \u001b[38;5;28mself\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfillna(fill_value)\n\u001b[0;32m--> 197\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/common.py:70\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer.<locals>.new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m     66\u001b[0m             \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m     68\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arraylike.py:56\u001b[0m, in \u001b[0;36mOpsMixin.__gt__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m     54\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__gt__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__gt__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m---> 56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cmp_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgt\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/series.py:5623\u001b[0m, in \u001b[0;36mSeries._cmp_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m   5620\u001b[0m rvalues \u001b[38;5;241m=\u001b[39m extract_array(other, extract_numpy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, extract_range\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m   5622\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m np\u001b[38;5;241m.\u001b[39merrstate(\u001b[38;5;28mall\u001b[39m\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 5623\u001b[0m     res_values \u001b[38;5;241m=\u001b[39m \u001b[43mops\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcomparison_op\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   5625\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_construct_result(res_values, name\u001b[38;5;241m=\u001b[39mres_name)\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/array_ops.py:269\u001b[0m, in \u001b[0;36mcomparison_op\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m    260\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    261\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLengths must match to compare\u001b[39m\u001b[38;5;124m\"\u001b[39m, lvalues\u001b[38;5;241m.\u001b[39mshape, rvalues\u001b[38;5;241m.\u001b[39mshape\n\u001b[1;32m    262\u001b[0m         )\n\u001b[1;32m    264\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m should_extension_dispatch(lvalues, rvalues) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m    265\u001b[0m     (\u001b[38;5;28misinstance\u001b[39m(rvalues, (Timedelta, BaseOffset, Timestamp)) \u001b[38;5;129;01mor\u001b[39;00m right \u001b[38;5;129;01mis\u001b[39;00m NaT)\n\u001b[1;32m    266\u001b[0m     \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_object_dtype(lvalues\u001b[38;5;241m.\u001b[39mdtype)\n\u001b[1;32m    267\u001b[0m ):\n\u001b[1;32m    268\u001b[0m     \u001b[38;5;66;03m# Call the method on lvalues\u001b[39;00m\n\u001b[0;32m--> 269\u001b[0m     res_values \u001b[38;5;241m=\u001b[39m \u001b[43mop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrvalues\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    271\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_scalar(rvalues) \u001b[38;5;129;01mand\u001b[39;00m isna(rvalues):  \u001b[38;5;66;03m# TODO: but not pd.NA?\u001b[39;00m\n\u001b[1;32m    272\u001b[0m     \u001b[38;5;66;03m# numpy does not like comparisons vs None\u001b[39;00m\n\u001b[1;32m    273\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m op \u001b[38;5;129;01mis\u001b[39;00m operator\u001b[38;5;241m.\u001b[39mne:\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/common.py:70\u001b[0m, in \u001b[0;36m_unpack_zerodim_and_defer.<locals>.new_method\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m     66\u001b[0m             \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mNotImplemented\u001b[39m\n\u001b[1;32m     68\u001b[0m other \u001b[38;5;241m=\u001b[39m item_from_zerodim(other)\n\u001b[0;32m---> 70\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arraylike.py:56\u001b[0m, in \u001b[0;36mOpsMixin.__gt__\u001b[0;34m(self, other)\u001b[0m\n\u001b[1;32m     54\u001b[0m \u001b[38;5;129m@unpack_zerodim_and_defer\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m__gt__\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     55\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__gt__\u001b[39m(\u001b[38;5;28mself\u001b[39m, other):\n\u001b[0;32m---> 56\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_cmp_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43moperator\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgt\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/datetimelike.py:1010\u001b[0m, in \u001b[0;36mDatetimeLikeArrayMixin._cmp_method\u001b[0;34m(self, other, op)\u001b[0m\n\u001b[1;32m   1008\u001b[0m     other \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_comparison_value(other)\n\u001b[1;32m   1009\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidComparison:\n\u001b[0;32m-> 1010\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minvalid_comparison\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mother\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mop\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1012\u001b[0m dtype \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(other, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdtype\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m   1013\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_object_dtype(dtype):\n\u001b[1;32m   1014\u001b[0m     \u001b[38;5;66;03m# We have to use comp_method_OBJECT_ARRAY instead of numpy\u001b[39;00m\n\u001b[1;32m   1015\u001b[0m     \u001b[38;5;66;03m#  comparison otherwise it would fail to raise when\u001b[39;00m\n\u001b[1;32m   1016\u001b[0m     \u001b[38;5;66;03m#  comparing tz-aware and tz-naive\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/anaconda3/lib/python3.9/site-packages/pandas/core/ops/invalid.py:34\u001b[0m, in \u001b[0;36minvalid_comparison\u001b[0;34m(left, right, op)\u001b[0m\n\u001b[1;32m     32\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     33\u001b[0m     typ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mtype\u001b[39m(right)\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\n\u001b[0;32m---> 34\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mInvalid comparison between dtype=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mleft\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m and \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mtyp\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     35\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res_values\n",
      "\u001b[0;31mTypeError\u001b[0m: Invalid comparison between dtype=datetime64[ns, UTC] and datetime"
     ]
    }
   ],
   "source": [
    "mask = dau_df[\"event_time\"].gt(datetime.strptime(\"2000-01-01 00:00:00\",format(\"%Y-%m-%d %H:%M:%S\")))\n",
    "dau_df = dau_df.loc[mask]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "ceb98788",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1970-01-31 00:00:00+00:00\n",
      "1970-02-28 00:00:00+00:00\n",
      "1970-03-31 00:00:00+00:00\n",
      "1970-04-30 00:00:00+00:00\n",
      "1970-05-31 00:00:00+00:00\n",
      "1970-06-30 00:00:00+00:00\n",
      "1970-07-31 00:00:00+00:00\n",
      "1970-08-31 00:00:00+00:00\n",
      "1970-09-30 00:00:00+00:00\n",
      "1970-10-31 00:00:00+00:00\n",
      "1970-11-30 00:00:00+00:00\n",
      "1970-12-31 00:00:00+00:00\n",
      "1971-01-31 00:00:00+00:00\n",
      "1971-02-28 00:00:00+00:00\n",
      "1971-03-31 00:00:00+00:00\n",
      "1971-04-30 00:00:00+00:00\n",
      "1971-05-31 00:00:00+00:00\n",
      "1971-06-30 00:00:00+00:00\n",
      "1971-07-31 00:00:00+00:00\n",
      "1971-08-31 00:00:00+00:00\n",
      "1971-09-30 00:00:00+00:00\n",
      "1971-10-31 00:00:00+00:00\n",
      "1971-11-30 00:00:00+00:00\n",
      "1971-12-31 00:00:00+00:00\n",
      "1972-01-31 00:00:00+00:00\n",
      "1972-02-29 00:00:00+00:00\n",
      "1972-03-31 00:00:00+00:00\n",
      "1972-04-30 00:00:00+00:00\n",
      "1972-05-31 00:00:00+00:00\n",
      "1972-06-30 00:00:00+00:00\n",
      "1972-07-31 00:00:00+00:00\n",
      "1972-08-31 00:00:00+00:00\n",
      "1972-09-30 00:00:00+00:00\n",
      "1972-10-31 00:00:00+00:00\n",
      "1972-11-30 00:00:00+00:00\n",
      "1972-12-31 00:00:00+00:00\n",
      "1973-01-31 00:00:00+00:00\n",
      "1973-02-28 00:00:00+00:00\n",
      "1973-03-31 00:00:00+00:00\n",
      "1973-04-30 00:00:00+00:00\n",
      "1973-05-31 00:00:00+00:00\n",
      "1973-06-30 00:00:00+00:00\n",
      "1973-07-31 00:00:00+00:00\n",
      "1973-08-31 00:00:00+00:00\n",
      "1973-09-30 00:00:00+00:00\n",
      "1973-10-31 00:00:00+00:00\n",
      "1973-11-30 00:00:00+00:00\n",
      "1973-12-31 00:00:00+00:00\n",
      "1974-01-31 00:00:00+00:00\n",
      "1974-02-28 00:00:00+00:00\n",
      "1974-03-31 00:00:00+00:00\n",
      "1974-04-30 00:00:00+00:00\n",
      "1974-05-31 00:00:00+00:00\n",
      "1974-06-30 00:00:00+00:00\n",
      "1974-07-31 00:00:00+00:00\n",
      "1974-08-31 00:00:00+00:00\n",
      "1974-09-30 00:00:00+00:00\n",
      "1974-10-31 00:00:00+00:00\n",
      "1974-11-30 00:00:00+00:00\n",
      "1974-12-31 00:00:00+00:00\n",
      "1975-01-31 00:00:00+00:00\n",
      "1975-02-28 00:00:00+00:00\n",
      "1975-03-31 00:00:00+00:00\n",
      "1975-04-30 00:00:00+00:00\n",
      "1975-05-31 00:00:00+00:00\n",
      "1975-06-30 00:00:00+00:00\n",
      "1975-07-31 00:00:00+00:00\n",
      "1975-08-31 00:00:00+00:00\n",
      "1975-09-30 00:00:00+00:00\n",
      "1975-10-31 00:00:00+00:00\n",
      "1975-11-30 00:00:00+00:00\n",
      "1975-12-31 00:00:00+00:00\n",
      "1976-01-31 00:00:00+00:00\n",
      "1976-02-29 00:00:00+00:00\n",
      "1976-03-31 00:00:00+00:00\n",
      "1976-04-30 00:00:00+00:00\n",
      "1976-05-31 00:00:00+00:00\n",
      "1976-06-30 00:00:00+00:00\n",
      "1976-07-31 00:00:00+00:00\n",
      "1976-08-31 00:00:00+00:00\n",
      "1976-09-30 00:00:00+00:00\n",
      "1976-10-31 00:00:00+00:00\n",
      "1976-11-30 00:00:00+00:00\n",
      "1976-12-31 00:00:00+00:00\n",
      "1977-01-31 00:00:00+00:00\n",
      "1977-02-28 00:00:00+00:00\n",
      "1977-03-31 00:00:00+00:00\n",
      "1977-04-30 00:00:00+00:00\n",
      "1977-05-31 00:00:00+00:00\n",
      "1977-06-30 00:00:00+00:00\n",
      "1977-07-31 00:00:00+00:00\n",
      "1977-08-31 00:00:00+00:00\n",
      "1977-09-30 00:00:00+00:00\n",
      "1977-10-31 00:00:00+00:00\n",
      "1977-11-30 00:00:00+00:00\n",
      "1977-12-31 00:00:00+00:00\n",
      "1978-01-31 00:00:00+00:00\n",
      "1978-02-28 00:00:00+00:00\n",
      "1978-03-31 00:00:00+00:00\n",
      "1978-04-30 00:00:00+00:00\n",
      "1978-05-31 00:00:00+00:00\n",
      "1978-06-30 00:00:00+00:00\n",
      "1978-07-31 00:00:00+00:00\n",
      "1978-08-31 00:00:00+00:00\n",
      "1978-09-30 00:00:00+00:00\n",
      "1978-10-31 00:00:00+00:00\n",
      "1978-11-30 00:00:00+00:00\n",
      "1978-12-31 00:00:00+00:00\n",
      "1979-01-31 00:00:00+00:00\n",
      "1979-02-28 00:00:00+00:00\n",
      "1979-03-31 00:00:00+00:00\n",
      "1979-04-30 00:00:00+00:00\n",
      "1979-05-31 00:00:00+00:00\n",
      "1979-06-30 00:00:00+00:00\n",
      "1979-07-31 00:00:00+00:00\n",
      "1979-08-31 00:00:00+00:00\n",
      "1979-09-30 00:00:00+00:00\n",
      "1979-10-31 00:00:00+00:00\n",
      "1979-11-30 00:00:00+00:00\n",
      "1979-12-31 00:00:00+00:00\n",
      "1980-01-31 00:00:00+00:00\n",
      "1980-02-29 00:00:00+00:00\n",
      "1980-03-31 00:00:00+00:00\n",
      "1980-04-30 00:00:00+00:00\n",
      "1980-05-31 00:00:00+00:00\n",
      "1980-06-30 00:00:00+00:00\n",
      "1980-07-31 00:00:00+00:00\n",
      "1980-08-31 00:00:00+00:00\n",
      "1980-09-30 00:00:00+00:00\n",
      "1980-10-31 00:00:00+00:00\n",
      "1980-11-30 00:00:00+00:00\n",
      "1980-12-31 00:00:00+00:00\n",
      "1981-01-31 00:00:00+00:00\n",
      "1981-02-28 00:00:00+00:00\n",
      "1981-03-31 00:00:00+00:00\n",
      "1981-04-30 00:00:00+00:00\n",
      "1981-05-31 00:00:00+00:00\n",
      "1981-06-30 00:00:00+00:00\n",
      "1981-07-31 00:00:00+00:00\n",
      "1981-08-31 00:00:00+00:00\n",
      "1981-09-30 00:00:00+00:00\n",
      "1981-10-31 00:00:00+00:00\n",
      "1981-11-30 00:00:00+00:00\n",
      "1981-12-31 00:00:00+00:00\n",
      "1982-01-31 00:00:00+00:00\n",
      "1982-02-28 00:00:00+00:00\n",
      "1982-03-31 00:00:00+00:00\n",
      "1982-04-30 00:00:00+00:00\n",
      "1982-05-31 00:00:00+00:00\n",
      "1982-06-30 00:00:00+00:00\n",
      "1982-07-31 00:00:00+00:00\n",
      "1982-08-31 00:00:00+00:00\n",
      "1982-09-30 00:00:00+00:00\n",
      "1982-10-31 00:00:00+00:00\n",
      "1982-11-30 00:00:00+00:00\n",
      "1982-12-31 00:00:00+00:00\n",
      "1983-01-31 00:00:00+00:00\n",
      "1983-02-28 00:00:00+00:00\n",
      "1983-03-31 00:00:00+00:00\n",
      "1983-04-30 00:00:00+00:00\n",
      "1983-05-31 00:00:00+00:00\n",
      "1983-06-30 00:00:00+00:00\n",
      "1983-07-31 00:00:00+00:00\n",
      "1983-08-31 00:00:00+00:00\n",
      "1983-09-30 00:00:00+00:00\n",
      "1983-10-31 00:00:00+00:00\n",
      "1983-11-30 00:00:00+00:00\n",
      "1983-12-31 00:00:00+00:00\n",
      "1984-01-31 00:00:00+00:00\n",
      "1984-02-29 00:00:00+00:00\n",
      "1984-03-31 00:00:00+00:00\n",
      "1984-04-30 00:00:00+00:00\n",
      "1984-05-31 00:00:00+00:00\n",
      "1984-06-30 00:00:00+00:00\n",
      "1984-07-31 00:00:00+00:00\n",
      "1984-08-31 00:00:00+00:00\n",
      "1984-09-30 00:00:00+00:00\n",
      "1984-10-31 00:00:00+00:00\n",
      "1984-11-30 00:00:00+00:00\n",
      "1984-12-31 00:00:00+00:00\n",
      "1985-01-31 00:00:00+00:00\n",
      "1985-02-28 00:00:00+00:00\n",
      "1985-03-31 00:00:00+00:00\n",
      "1985-04-30 00:00:00+00:00\n",
      "1985-05-31 00:00:00+00:00\n",
      "1985-06-30 00:00:00+00:00\n",
      "1985-07-31 00:00:00+00:00\n",
      "1985-08-31 00:00:00+00:00\n",
      "1985-09-30 00:00:00+00:00\n",
      "1985-10-31 00:00:00+00:00\n",
      "1985-11-30 00:00:00+00:00\n",
      "1985-12-31 00:00:00+00:00\n",
      "1986-01-31 00:00:00+00:00\n",
      "1986-02-28 00:00:00+00:00\n",
      "1986-03-31 00:00:00+00:00\n",
      "1986-04-30 00:00:00+00:00\n",
      "1986-05-31 00:00:00+00:00\n",
      "1986-06-30 00:00:00+00:00\n",
      "1986-07-31 00:00:00+00:00\n",
      "1986-08-31 00:00:00+00:00\n",
      "1986-09-30 00:00:00+00:00\n",
      "1986-10-31 00:00:00+00:00\n",
      "1986-11-30 00:00:00+00:00\n",
      "1986-12-31 00:00:00+00:00\n",
      "1987-01-31 00:00:00+00:00\n",
      "1987-02-28 00:00:00+00:00\n",
      "1987-03-31 00:00:00+00:00\n",
      "1987-04-30 00:00:00+00:00\n",
      "1987-05-31 00:00:00+00:00\n",
      "1987-06-30 00:00:00+00:00\n",
      "1987-07-31 00:00:00+00:00\n",
      "1987-08-31 00:00:00+00:00\n",
      "1987-09-30 00:00:00+00:00\n",
      "1987-10-31 00:00:00+00:00\n",
      "1987-11-30 00:00:00+00:00\n",
      "1987-12-31 00:00:00+00:00\n",
      "1988-01-31 00:00:00+00:00\n",
      "1988-02-29 00:00:00+00:00\n",
      "1988-03-31 00:00:00+00:00\n",
      "1988-04-30 00:00:00+00:00\n",
      "1988-05-31 00:00:00+00:00\n",
      "1988-06-30 00:00:00+00:00\n",
      "1988-07-31 00:00:00+00:00\n",
      "1988-08-31 00:00:00+00:00\n",
      "1988-09-30 00:00:00+00:00\n",
      "1988-10-31 00:00:00+00:00\n",
      "1988-11-30 00:00:00+00:00\n",
      "1988-12-31 00:00:00+00:00\n",
      "1989-01-31 00:00:00+00:00\n",
      "1989-02-28 00:00:00+00:00\n",
      "1989-03-31 00:00:00+00:00\n",
      "1989-04-30 00:00:00+00:00\n",
      "1989-05-31 00:00:00+00:00\n",
      "1989-06-30 00:00:00+00:00\n",
      "1989-07-31 00:00:00+00:00\n",
      "1989-08-31 00:00:00+00:00\n",
      "1989-09-30 00:00:00+00:00\n",
      "1989-10-31 00:00:00+00:00\n",
      "1989-11-30 00:00:00+00:00\n",
      "1989-12-31 00:00:00+00:00\n",
      "1990-01-31 00:00:00+00:00\n",
      "1990-02-28 00:00:00+00:00\n",
      "1990-03-31 00:00:00+00:00\n",
      "1990-04-30 00:00:00+00:00\n",
      "1990-05-31 00:00:00+00:00\n",
      "1990-06-30 00:00:00+00:00\n",
      "1990-07-31 00:00:00+00:00\n",
      "1990-08-31 00:00:00+00:00\n",
      "1990-09-30 00:00:00+00:00\n",
      "1990-10-31 00:00:00+00:00\n",
      "1990-11-30 00:00:00+00:00\n",
      "1990-12-31 00:00:00+00:00\n",
      "1991-01-31 00:00:00+00:00\n",
      "1991-02-28 00:00:00+00:00\n",
      "1991-03-31 00:00:00+00:00\n",
      "1991-04-30 00:00:00+00:00\n",
      "1991-05-31 00:00:00+00:00\n",
      "1991-06-30 00:00:00+00:00\n",
      "1991-07-31 00:00:00+00:00\n",
      "1991-08-31 00:00:00+00:00\n",
      "1991-09-30 00:00:00+00:00\n",
      "1991-10-31 00:00:00+00:00\n",
      "1991-11-30 00:00:00+00:00\n",
      "1991-12-31 00:00:00+00:00\n",
      "1992-01-31 00:00:00+00:00\n",
      "1992-02-29 00:00:00+00:00\n",
      "1992-03-31 00:00:00+00:00\n",
      "1992-04-30 00:00:00+00:00\n",
      "1992-05-31 00:00:00+00:00\n",
      "1992-06-30 00:00:00+00:00\n",
      "1992-07-31 00:00:00+00:00\n",
      "1992-08-31 00:00:00+00:00\n",
      "1992-09-30 00:00:00+00:00\n",
      "1992-10-31 00:00:00+00:00\n",
      "1992-11-30 00:00:00+00:00\n",
      "1992-12-31 00:00:00+00:00\n",
      "1993-01-31 00:00:00+00:00\n",
      "1993-02-28 00:00:00+00:00\n",
      "1993-03-31 00:00:00+00:00\n",
      "1993-04-30 00:00:00+00:00\n",
      "1993-05-31 00:00:00+00:00\n",
      "1993-06-30 00:00:00+00:00\n",
      "1993-07-31 00:00:00+00:00\n",
      "1993-08-31 00:00:00+00:00\n",
      "1993-09-30 00:00:00+00:00\n",
      "1993-10-31 00:00:00+00:00\n",
      "1993-11-30 00:00:00+00:00\n",
      "1993-12-31 00:00:00+00:00\n",
      "1994-01-31 00:00:00+00:00\n",
      "1994-02-28 00:00:00+00:00\n",
      "1994-03-31 00:00:00+00:00\n",
      "1994-04-30 00:00:00+00:00\n",
      "1994-05-31 00:00:00+00:00\n",
      "1994-06-30 00:00:00+00:00\n",
      "1994-07-31 00:00:00+00:00\n",
      "1994-08-31 00:00:00+00:00\n",
      "1994-09-30 00:00:00+00:00\n",
      "1994-10-31 00:00:00+00:00\n",
      "1994-11-30 00:00:00+00:00\n",
      "1994-12-31 00:00:00+00:00\n",
      "1995-01-31 00:00:00+00:00\n",
      "1995-02-28 00:00:00+00:00\n",
      "1995-03-31 00:00:00+00:00\n",
      "1995-04-30 00:00:00+00:00\n",
      "1995-05-31 00:00:00+00:00\n",
      "1995-06-30 00:00:00+00:00\n",
      "1995-07-31 00:00:00+00:00\n",
      "1995-08-31 00:00:00+00:00\n",
      "1995-09-30 00:00:00+00:00\n",
      "1995-10-31 00:00:00+00:00\n",
      "1995-11-30 00:00:00+00:00\n",
      "1995-12-31 00:00:00+00:00\n",
      "1996-01-31 00:00:00+00:00\n",
      "1996-02-29 00:00:00+00:00\n",
      "1996-03-31 00:00:00+00:00\n",
      "1996-04-30 00:00:00+00:00\n",
      "1996-05-31 00:00:00+00:00\n",
      "1996-06-30 00:00:00+00:00\n",
      "1996-07-31 00:00:00+00:00\n",
      "1996-08-31 00:00:00+00:00\n",
      "1996-09-30 00:00:00+00:00\n",
      "1996-10-31 00:00:00+00:00\n",
      "1996-11-30 00:00:00+00:00\n",
      "1996-12-31 00:00:00+00:00\n",
      "1997-01-31 00:00:00+00:00\n",
      "1997-02-28 00:00:00+00:00\n",
      "1997-03-31 00:00:00+00:00\n",
      "1997-04-30 00:00:00+00:00\n",
      "1997-05-31 00:00:00+00:00\n",
      "1997-06-30 00:00:00+00:00\n",
      "1997-07-31 00:00:00+00:00\n",
      "1997-08-31 00:00:00+00:00\n",
      "1997-09-30 00:00:00+00:00\n",
      "1997-10-31 00:00:00+00:00\n",
      "1997-11-30 00:00:00+00:00\n",
      "1997-12-31 00:00:00+00:00\n",
      "1998-01-31 00:00:00+00:00\n",
      "1998-02-28 00:00:00+00:00\n",
      "1998-03-31 00:00:00+00:00\n",
      "1998-04-30 00:00:00+00:00\n",
      "1998-05-31 00:00:00+00:00\n",
      "1998-06-30 00:00:00+00:00\n",
      "1998-07-31 00:00:00+00:00\n",
      "1998-08-31 00:00:00+00:00\n",
      "1998-09-30 00:00:00+00:00\n",
      "1998-10-31 00:00:00+00:00\n",
      "1998-11-30 00:00:00+00:00\n",
      "1998-12-31 00:00:00+00:00\n",
      "1999-01-31 00:00:00+00:00\n",
      "1999-02-28 00:00:00+00:00\n",
      "1999-03-31 00:00:00+00:00\n",
      "1999-04-30 00:00:00+00:00\n",
      "1999-05-31 00:00:00+00:00\n",
      "1999-06-30 00:00:00+00:00\n",
      "1999-07-31 00:00:00+00:00\n",
      "1999-08-31 00:00:00+00:00\n",
      "1999-09-30 00:00:00+00:00\n",
      "1999-10-31 00:00:00+00:00\n",
      "1999-11-30 00:00:00+00:00\n",
      "1999-12-31 00:00:00+00:00\n",
      "2000-01-31 00:00:00+00:00\n",
      "2000-02-29 00:00:00+00:00\n",
      "2000-03-31 00:00:00+00:00\n",
      "2000-04-30 00:00:00+00:00\n",
      "2000-05-31 00:00:00+00:00\n",
      "2000-06-30 00:00:00+00:00\n",
      "2000-07-31 00:00:00+00:00\n",
      "2000-08-31 00:00:00+00:00\n",
      "2000-09-30 00:00:00+00:00\n",
      "2000-10-31 00:00:00+00:00\n",
      "2000-11-30 00:00:00+00:00\n",
      "2000-12-31 00:00:00+00:00\n",
      "2001-01-31 00:00:00+00:00\n",
      "2001-02-28 00:00:00+00:00\n",
      "2001-03-31 00:00:00+00:00\n",
      "2001-04-30 00:00:00+00:00\n",
      "2001-05-31 00:00:00+00:00\n",
      "2001-06-30 00:00:00+00:00\n",
      "2001-07-31 00:00:00+00:00\n",
      "2001-08-31 00:00:00+00:00\n",
      "2001-09-30 00:00:00+00:00\n",
      "2001-10-31 00:00:00+00:00\n",
      "2001-11-30 00:00:00+00:00\n",
      "2001-12-31 00:00:00+00:00\n",
      "2002-01-31 00:00:00+00:00\n",
      "2002-02-28 00:00:00+00:00\n",
      "2002-03-31 00:00:00+00:00\n",
      "2002-04-30 00:00:00+00:00\n",
      "2002-05-31 00:00:00+00:00\n",
      "2002-06-30 00:00:00+00:00\n",
      "2002-07-31 00:00:00+00:00\n",
      "2002-08-31 00:00:00+00:00\n",
      "2002-09-30 00:00:00+00:00\n",
      "2002-10-31 00:00:00+00:00\n",
      "2002-11-30 00:00:00+00:00\n",
      "2002-12-31 00:00:00+00:00\n",
      "2003-01-31 00:00:00+00:00\n",
      "2003-02-28 00:00:00+00:00\n",
      "2003-03-31 00:00:00+00:00\n",
      "2003-04-30 00:00:00+00:00\n",
      "2003-05-31 00:00:00+00:00\n",
      "2003-06-30 00:00:00+00:00\n",
      "2003-07-31 00:00:00+00:00\n",
      "2003-08-31 00:00:00+00:00\n",
      "2003-09-30 00:00:00+00:00\n",
      "2003-10-31 00:00:00+00:00\n",
      "2003-11-30 00:00:00+00:00\n",
      "2003-12-31 00:00:00+00:00\n",
      "2004-01-31 00:00:00+00:00\n",
      "2004-02-29 00:00:00+00:00\n",
      "2004-03-31 00:00:00+00:00\n",
      "2004-04-30 00:00:00+00:00\n",
      "2004-05-31 00:00:00+00:00\n",
      "2004-06-30 00:00:00+00:00\n",
      "2004-07-31 00:00:00+00:00\n",
      "2004-08-31 00:00:00+00:00\n",
      "2004-09-30 00:00:00+00:00\n",
      "2004-10-31 00:00:00+00:00\n",
      "2004-11-30 00:00:00+00:00\n",
      "2004-12-31 00:00:00+00:00\n",
      "2005-01-31 00:00:00+00:00\n",
      "2005-02-28 00:00:00+00:00\n",
      "2005-03-31 00:00:00+00:00\n",
      "2005-04-30 00:00:00+00:00\n",
      "2005-05-31 00:00:00+00:00\n",
      "2005-06-30 00:00:00+00:00\n",
      "2005-07-31 00:00:00+00:00\n",
      "2005-08-31 00:00:00+00:00\n",
      "2005-09-30 00:00:00+00:00\n",
      "2005-10-31 00:00:00+00:00\n",
      "2005-11-30 00:00:00+00:00\n",
      "2005-12-31 00:00:00+00:00\n",
      "2006-01-31 00:00:00+00:00\n",
      "2006-02-28 00:00:00+00:00\n",
      "2006-03-31 00:00:00+00:00\n",
      "2006-04-30 00:00:00+00:00\n",
      "2006-05-31 00:00:00+00:00\n",
      "2006-06-30 00:00:00+00:00\n",
      "2006-07-31 00:00:00+00:00\n",
      "2006-08-31 00:00:00+00:00\n",
      "2006-09-30 00:00:00+00:00\n",
      "2006-10-31 00:00:00+00:00\n",
      "2006-11-30 00:00:00+00:00\n",
      "2006-12-31 00:00:00+00:00\n",
      "2007-01-31 00:00:00+00:00\n",
      "2007-02-28 00:00:00+00:00\n",
      "2007-03-31 00:00:00+00:00\n",
      "2007-04-30 00:00:00+00:00\n",
      "2007-05-31 00:00:00+00:00\n",
      "2007-06-30 00:00:00+00:00\n",
      "2007-07-31 00:00:00+00:00\n",
      "2007-08-31 00:00:00+00:00\n",
      "2007-09-30 00:00:00+00:00\n",
      "2007-10-31 00:00:00+00:00\n",
      "2007-11-30 00:00:00+00:00\n",
      "2007-12-31 00:00:00+00:00\n",
      "2008-01-31 00:00:00+00:00\n",
      "2008-02-29 00:00:00+00:00\n",
      "2008-03-31 00:00:00+00:00\n",
      "2008-04-30 00:00:00+00:00\n",
      "2008-05-31 00:00:00+00:00\n",
      "2008-06-30 00:00:00+00:00\n",
      "2008-07-31 00:00:00+00:00\n",
      "2008-08-31 00:00:00+00:00\n",
      "2008-09-30 00:00:00+00:00\n",
      "2008-10-31 00:00:00+00:00\n",
      "2008-11-30 00:00:00+00:00\n",
      "2008-12-31 00:00:00+00:00\n",
      "2009-01-31 00:00:00+00:00\n",
      "2009-02-28 00:00:00+00:00\n",
      "2009-03-31 00:00:00+00:00\n",
      "2009-04-30 00:00:00+00:00\n",
      "2009-05-31 00:00:00+00:00\n",
      "2009-06-30 00:00:00+00:00\n",
      "2009-07-31 00:00:00+00:00\n",
      "2009-08-31 00:00:00+00:00\n",
      "2009-09-30 00:00:00+00:00\n",
      "2009-10-31 00:00:00+00:00\n",
      "2009-11-30 00:00:00+00:00\n",
      "2009-12-31 00:00:00+00:00\n",
      "2010-01-31 00:00:00+00:00\n",
      "2010-02-28 00:00:00+00:00\n",
      "2010-03-31 00:00:00+00:00\n",
      "2010-04-30 00:00:00+00:00\n",
      "2010-05-31 00:00:00+00:00\n",
      "2010-06-30 00:00:00+00:00\n",
      "2010-07-31 00:00:00+00:00\n",
      "2010-08-31 00:00:00+00:00\n",
      "2010-09-30 00:00:00+00:00\n",
      "2010-10-31 00:00:00+00:00\n",
      "2010-11-30 00:00:00+00:00\n",
      "2010-12-31 00:00:00+00:00\n",
      "2011-01-31 00:00:00+00:00\n",
      "2011-02-28 00:00:00+00:00\n",
      "2011-03-31 00:00:00+00:00\n",
      "2011-04-30 00:00:00+00:00\n",
      "2011-05-31 00:00:00+00:00\n",
      "2011-06-30 00:00:00+00:00\n",
      "2011-07-31 00:00:00+00:00\n",
      "2011-08-31 00:00:00+00:00\n",
      "2011-09-30 00:00:00+00:00\n",
      "2011-10-31 00:00:00+00:00\n",
      "2011-11-30 00:00:00+00:00\n",
      "2011-12-31 00:00:00+00:00\n",
      "2012-01-31 00:00:00+00:00\n",
      "2012-02-29 00:00:00+00:00\n",
      "2012-03-31 00:00:00+00:00\n",
      "2012-04-30 00:00:00+00:00\n",
      "2012-05-31 00:00:00+00:00\n",
      "2012-06-30 00:00:00+00:00\n",
      "2012-07-31 00:00:00+00:00\n",
      "2012-08-31 00:00:00+00:00\n",
      "2012-09-30 00:00:00+00:00\n",
      "2012-10-31 00:00:00+00:00\n",
      "2012-11-30 00:00:00+00:00\n",
      "2012-12-31 00:00:00+00:00\n",
      "2013-01-31 00:00:00+00:00\n",
      "2013-02-28 00:00:00+00:00\n",
      "2013-03-31 00:00:00+00:00\n",
      "2013-04-30 00:00:00+00:00\n",
      "2013-05-31 00:00:00+00:00\n",
      "2013-06-30 00:00:00+00:00\n",
      "2013-07-31 00:00:00+00:00\n",
      "2013-08-31 00:00:00+00:00\n",
      "2013-09-30 00:00:00+00:00\n",
      "2013-10-31 00:00:00+00:00\n",
      "2013-11-30 00:00:00+00:00\n",
      "2013-12-31 00:00:00+00:00\n",
      "2014-01-31 00:00:00+00:00\n",
      "2014-02-28 00:00:00+00:00\n",
      "2014-03-31 00:00:00+00:00\n",
      "2014-04-30 00:00:00+00:00\n",
      "2014-05-31 00:00:00+00:00\n",
      "2014-06-30 00:00:00+00:00\n",
      "2014-07-31 00:00:00+00:00\n",
      "2014-08-31 00:00:00+00:00\n",
      "2014-09-30 00:00:00+00:00\n",
      "2014-10-31 00:00:00+00:00\n",
      "2014-11-30 00:00:00+00:00\n",
      "2014-12-31 00:00:00+00:00\n",
      "2015-01-31 00:00:00+00:00\n",
      "2015-02-28 00:00:00+00:00\n",
      "2015-03-31 00:00:00+00:00\n",
      "2015-04-30 00:00:00+00:00\n",
      "2015-05-31 00:00:00+00:00\n",
      "2015-06-30 00:00:00+00:00\n",
      "2015-07-31 00:00:00+00:00\n",
      "2015-08-31 00:00:00+00:00\n",
      "2015-09-30 00:00:00+00:00\n",
      "2015-10-31 00:00:00+00:00\n",
      "2015-11-30 00:00:00+00:00\n",
      "2015-12-31 00:00:00+00:00\n",
      "2016-01-31 00:00:00+00:00\n",
      "2016-02-29 00:00:00+00:00\n",
      "2016-03-31 00:00:00+00:00\n",
      "2016-04-30 00:00:00+00:00\n",
      "2016-05-31 00:00:00+00:00\n",
      "2016-06-30 00:00:00+00:00\n",
      "2016-07-31 00:00:00+00:00\n",
      "2016-08-31 00:00:00+00:00\n",
      "2016-09-30 00:00:00+00:00\n",
      "2016-10-31 00:00:00+00:00\n",
      "2016-11-30 00:00:00+00:00\n",
      "2016-12-31 00:00:00+00:00\n",
      "2017-01-31 00:00:00+00:00\n",
      "2017-02-28 00:00:00+00:00\n",
      "2017-03-31 00:00:00+00:00\n",
      "2017-04-30 00:00:00+00:00\n",
      "2017-05-31 00:00:00+00:00\n",
      "2017-06-30 00:00:00+00:00\n",
      "2017-07-31 00:00:00+00:00\n",
      "2017-08-31 00:00:00+00:00\n",
      "2017-09-30 00:00:00+00:00\n",
      "2017-10-31 00:00:00+00:00\n",
      "2017-11-30 00:00:00+00:00\n",
      "2017-12-31 00:00:00+00:00\n",
      "2018-01-31 00:00:00+00:00\n",
      "2018-02-28 00:00:00+00:00\n",
      "2018-03-31 00:00:00+00:00\n",
      "2018-04-30 00:00:00+00:00\n",
      "2018-05-31 00:00:00+00:00\n",
      "2018-06-30 00:00:00+00:00\n",
      "2018-07-31 00:00:00+00:00\n",
      "2018-08-31 00:00:00+00:00\n",
      "2018-09-30 00:00:00+00:00\n",
      "2018-10-31 00:00:00+00:00\n",
      "2018-11-30 00:00:00+00:00\n",
      "2018-12-31 00:00:00+00:00\n",
      "2019-01-31 00:00:00+00:00\n",
      "2019-02-28 00:00:00+00:00\n",
      "2019-03-31 00:00:00+00:00\n",
      "2019-04-30 00:00:00+00:00\n",
      "2019-05-31 00:00:00+00:00\n",
      "2019-06-30 00:00:00+00:00\n",
      "2019-07-31 00:00:00+00:00\n",
      "2019-08-31 00:00:00+00:00\n",
      "2019-09-30 00:00:00+00:00\n",
      "2019-10-31 00:00:00+00:00\n",
      "2019-11-30 00:00:00+00:00\n",
      "2019-12-31 00:00:00+00:00\n",
      "2020-01-31 00:00:00+00:00\n",
      "2020-02-29 00:00:00+00:00\n",
      "2020-03-31 00:00:00+00:00\n",
      "2020-04-30 00:00:00+00:00\n",
      "2020-05-31 00:00:00+00:00\n",
      "2020-06-30 00:00:00+00:00\n",
      "2020-07-31 00:00:00+00:00\n",
      "2020-08-31 00:00:00+00:00\n",
      "2020-09-30 00:00:00+00:00\n",
      "2020-10-31 00:00:00+00:00\n",
      "2020-11-30 00:00:00+00:00\n"
     ]
    }
   ],
   "source": [
    "dau_df[\"event_time\"]  = pd.to_datetime(dau_df[\"event_time\"])\n",
    "res = dau_df.groupby(pd.Grouper(key=\"event_time\",freq=\"1M\", origin=\"start\"))#[\"user_id\"].unique().count()\n",
    "for (group, value) in res:\n",
    "    print(group)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "179ee5d5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "python3.6",
   "language": "python",
   "name": "python3.6"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": true
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
